# Merge data to parcel boundaries
pacman::p_load(tidyverse, sf, cowplot, data.table, fst, qs, mapview)

## Preliminaries
rm(list = ls())

source("code/globals.R")

# Load homes data and limit to relevant homes/ counties ----
homes <- read_fst(file.path(WORKING, "data_to_geocode.fst"), as.data.table = T)

# Only geocode single family homes (mobile homes may not be in parcel shapefiles).
singlefamcodes <- c(
  "RR101", # SINGLE FAMILY RESIDENTIAL
  "RR102", # RURAL RESIDENCE
  "RR104", # TOWNHOUSE
  "RR105", # CLUSTER HOME
  "RR106", # CONDOMINIUM
  "RR109", # PLANNED UNIT DEVELOPMENT
  "RR999"
)
homes[, single_family := PropertyLandUseStndCode %in% singlefamcodes]
homes <- homes[homes$single_family == TRUE, ]

relevantcounties <- unique(homes$FIPS[homes$destroyed == TRUE])
homes <- homes[homes$FIPS %in% relevantcounties, ]

# Keep one home record per importparcelid
homes[, minBldg := min(BuildingOrImprovementNumber), by = ImportParcelID]
table(homes$minBldg, useNA = "ifany") # Almost every importparcelid has a building numbered '1'
homes <- homes[BuildingOrImprovementNumber == minBldg, ]
stopifnot(anyDuplicated(homes$ImportParcelID) == 0) #

# Handle duplicated APNs
homes[, copies := .N, by = .(FIPS, UnformattedAssessorParcelNumber)]
dupes <- homes[copies > 1][order(FIPS, UnformattedAssessorParcelNumber)]
table(dupes$countyname, useNA = "ifany") # Mostly in Lake County CA and Larimer County, CO. This is not really a problem -- the merge of homes to parcel polygones will be many-to-many, returning all possible combinations of homes and polygons.
rm(dupes)

homes <- homes[, .(
  FIPS, UnformattedAssessorParcelNumber, ImportParcelID,
  BuildingOrImprovementNumber, PropertyFullStreetAddress,
  PropertyLandUseStndCode, destroyed, incidentid, AssessmentYear,
  State
)]

homes$archivedUAPN <- homes$UnformattedAssessorParcelNumber

# initialize mergerates dataframe
mergerates <- unique(homes[, c("FIPS", "incidentid")]) %>%
  mutate(mergerate = NA)

## Full list of all counties (FIPS) that need to be matched.
fipslist <- as.character(relevantcounties)
for (i in seq(1, length(fipslist))) {
  if (nchar(fipslist[i]) == 4) {
    fipslist[i] <- paste0("0", fipslist[i])
  }
}

fipslist <- fipslist[which(fipslist != "41041")] # No data for Lincoln OR


# Read parcel shapefile for CA -----

# Takes about 30 minutes to load
all_parcels_CA <- st_read(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "Parcels_CA_2014", "Parcels_CA_2014.gdb"), 
                          layer = "CA_PARCELS_STATEWIDE")


# Read in FIPS crosswalk for CA counties (created by 01_DINS_Import.R)
# Has more counties than previous versions of the code (6001, 6005, 6021, 6067, 6069)
FIPScrosswalk <- read_fst(file.path(WORKING, "county-crosswalk.fst"), as.data.table = T)
FIPScrosswalk[, FIPS := sprintf("%05d", as.numeric(FIPS))]

# Make a list of CA counties for which we will use this omnibus file (as opposed to county-specific data).
FIPScrosswalk[, use_omnibus := FIPS %in% fipslist & 
                !(cname %in% c(
                  "Los Angeles", "Madera", "Orange",
                  "Placer",
                  "San Bernardino", "Santa Cruz",
                  "Shasta", "Ventura"
                ))]


## Function to read parcel boundaries for 1 county
read_county_parcel_lines <- function(this_county) {
  # this_county <- "Butte"
  # END DEBUG
  this_fips <- FIPScrosswalk[cname == this_county, FIPS]
  
  print(sprintf("%s: %s", this_fips, this_county))
  
  this_parcels <- all_parcels_CA %>% filter(County == this_county)
  
  # Attempt to correct invalid polygon shapes (self intersections, slivers, etc)
  # try(this_parcels <- st_cast(this_parcels, "MULTIPOLYGON")) # This could help with WKB type 12 issue, but also fine to leave as is I think
  try(this_parcels <- st_make_valid(this_parcels)) 

  qsave(this_parcels, file.path(WORKING, "lotlines", "orig", paste0(this_fips, ".qs")))

  return(NULL)
}

# Create parcel boundaries for all desired counties
lapply(FIPScrosswalk %>% filter(use_omnibus) %>% pull(cname), read_county_parcel_lines)


# For other counties, read individual parcel data -----

## Los Angeles County ----
losangeles <- st_read(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "California_LosAngeles", "LA-fixed.gpkg")) %>% # Uses an LA shapefile that Patrick fixed with a command line tool to remove some invalid geometries
  dplyr::select(APN, SitusAddress, SitusCity, SitusZIP) %>%
  dplyr::rename(PARNO = APN, ADDRESS = SitusAddress, CITY = SitusCity, ZIP = SitusZIP) %>%
  dplyr::mutate(County = "Los Angeles") %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310) # NAD83 California Albers

qsave(losangeles, file.path(WORKING, "lotlines", "orig", "06037.qs"))
rm(losangeles)

## Madera County ----
madera <- st_read(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "California_Madera", "ASR_PARCEL.shp")) %>%
  dplyr::select(Parcel, Situs, SitusCity) %>%
  rename(PARNO = Parcel, ADDRESS = Situs, CITY = SitusCity) %>%
  mutate(ZIP = NA, County = "Madera") %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310) # NAD83 California Albers

qsave(madera, file.path(WORKING, "lotlines", "orig", "06039.qs"))
rm(madera)

#------- Orange County
# Refer to Orange County data dictionary in containing folder for merging info
orange_county_APNs <- read.csv(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "California_Orange", "ParcelAttribute.csv", "ParcelAttribute.csv.txt"),
  sep = "|", col.names = c("OBJECTID", "original_PARNO", "parent_parno_condo")
)
condos <- orange_county_APNs[which(orange_county_APNs$OBJECTID == 0), ] %>%
  rename(child_parno_condo = original_PARNO) %>%
  select(child_parno_condo, parent_parno_condo)
notcondos <- orange_county_APNs[which(orange_county_APNs$OBJECTID != 0), ] %>%
  select(OBJECTID, original_PARNO)
crosswalk_objid_apn <- merge(condos, notcondos, by.x = "parent_parno_condo", by.y = "original_PARNO") %>%
  select(OBJECTID, original_PARNO = child_parno_condo) %>%
  rbind(notcondos)

orange <- st_read(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "California_Orange", "Orange-fixed.gpkg")) %>%
  st_zm() %>% # Remove the 'Z' dimension
  merge(crosswalk_objid_apn, by = "OBJECTID", all.y = T) %>%
  select(original_PARNO) %>%
  rename(PARNO = original_PARNO) %>%
  dplyr::mutate(County = "Orange", ADDRESS = NA, CITY = NA, ZIP = NA) %>%
  ## st_cast('MULTIPOLYGON') %>%  Fails just for this county; this will not be multipolygon.
  st_transform(crs = 3310) # NAD83 California Albers

qsave(orange, file.path(WORKING, "lotlines", "orig", "06059.qs"))
rm(orange, orange_county_APNs, condos, notcondos, crosswalk_objid_apn)

#---- Placer County
placer <- st_read(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "California_Placer", "Parcels", "Parcels.shp")) %>%
  dplyr::select(APN, STREETNUM, STREETNAME, STREETTYPE, CITY, ZIP) %>%
  rename(PARNO = APN) %>%
  mutate(
    ADDRESS = paste0(STREETNUM, STREETNAME, STREETTYPE),
    County = "Placer"
  ) %>%
  dplyr::select(PARNO, ADDRESS, CITY, ZIP, County) %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310) # NAD83 California Albers

qsave(placer, file.path(WORKING, "lotlines", "orig", "06061.qs"))
rm(placer)


#---- San Bernardino County
sanb <- st_read(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "California_SanBernardino", "countywide_parcels_06_21_2021", "pbmpoly.shp")) %>%
  dplyr::select(APN, NUMBER, PREDIR, STREETNAME, STREETTYPE, CITY, ZIP) %>%
  rename(PARNO = APN) %>%
  mutate(
    ADDRESS = paste0(NUMBER, PREDIR, STREETNAME, STREETTYPE),
    County = "San Bernardino"
  ) %>%
  dplyr::select(PARNO, ADDRESS, CITY, ZIP, County) %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310) # NAD83 California Albers

# ggplot(data=sanb)+geom_sf()
qsave(sanb, file.path(WORKING, "lotlines", "orig", "06071.qs"))
rm(sanb)

#----- Santa Cruz
santacruz <- read_sf(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "California_SantaCruz", "Assessor_Parcels.shp")) %>%
  select(APN, SITEADD, SITCITY, SITZIP) %>%
  rename(PARNO = APN, ADDRESS = SITEADD, CITY = SITCITY, ZIP = SITZIP) %>%
  mutate(County = "Santa Cruz") %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310)

qsave(santacruz, file.path(WORKING, "lotlines", "orig", "06087.qs"))
rm(santacruz)

#----- Shasta County
shasta <- read_sf(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "California_Shasta", "Parcels.shp")) %>%
  dplyr::rename(PARNO = ASMT) %>%
  dplyr::mutate(County = "Shasta", ADDRESS = NA, CITY = NA, ZIP = NA) %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310) # NAD83 California Albers

# ggplot(data=shasta)+geom_sf()
qsave(shasta, file.path(WORKING, "lotlines", "orig", "06089.qs"))
rm(shasta)

#------- Ventura County   #SHOULD YOU BE USING APN10 in this county lot line file?
ventura <- read_sf(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "California_Ventura", "parcel.shp")) %>%
  dplyr::rename(PARNO = APN) %>%
  dplyr::mutate(County = "Ventura", ADDRESS = NA, CITY = NA, ZIP = NA) %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310) # NAD83 California Albers

# ggplot(data=ventura)+geom_sf()
qsave(ventura, file.path(WORKING, "lotlines", "orig", "06111.qs"))
rm(ventura)

#-------------------------------------------------------------------------------------------#
#-------------- Pre-process county shapefiles for non-CA counties---------------------------#
#-------------------------------------------------------------------------------------------#

# ----- Arizona -----------------------
yv <- st_read(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "Arizona_Yavapai/BoomhowerData.gdb/Data.gdb")) %>%
  rename(PARNO = PARNUMASR) %>%
  select(PARNO) %>%
  mutate(ADDRESS = NA, CITY = NA, ZIP = NA, County = "Yavapai") %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310) # NAD83 California Albers

qsave(yv, file.path(WORKING, "lotlines", "orig", "04025.qs"))
rm(yv)

#----- Colorado -----------------------------------------------------
bou <- st_read(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "Colorado_Boulder/Parcels.shp")) %>%
  rename(PARNO = PARCEL_NO) %>%
  select(PARNO) %>%
  mutate(ADDRESS = NA, CITY = NA, ZIP = NA, County = "Boulder") %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310) # NAD83 California Albers

qsave(bou, file.path(WORKING, "lotlines", "orig", "08013.qs"))
rm(bou)


elpaso <- st_read(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "Colorado_ElPaso/parcels1foot_public.shp")) %>%
  rename(PARNO = PARCEL, ADDRESS = PLOC) %>%
  select(PARNO, ADDRESS) %>%
  mutate(CITY = NA, ZIP = NA, County = "El Paso") %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310) # NAD83 California Albers

qsave(elpaso, file.path(WORKING, "lotlines", "orig", "08041.qs"))
rm(elpaso)


grand <- st_read(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "Colorado_Grand/Parcels.shp")) %>%
  rename(PARNO = PIN, ADDRESS = GIS_ADD) %>%
  select(PARNO, ADDRESS) %>%
  mutate(CITY = NA, ZIP = NA, County = "Grand") %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310) # NAD83 California Albers

qsave(grand, file.path(WORKING, "lotlines", "orig", "08049.qs"))
rm(grand)

larimer <- st_read(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "Colorado_Larimer/ParcelOwner.shp")) %>%
  rename(PARNO = PARCELNUM, ADDRESS = LOCADDRESS, CITY = LOCCITY, ZIP = LOCZIPCODE) %>%
  select(PARNO, ADDRESS, CITY, ZIP) %>%
  mutate(County = "Larimer") %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310) # NAD83 California Albers

qsave(larimer, file.path(WORKING, "lotlines", "orig", "08069.qs"))
rm(larimer)

# ------- Oregon ---------------------------------------------------------
marion <- st_read(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "Oregon_Marion/Parcels.shp")) %>%
  select(ALT_TAXACC, SITUS, SITUSCSZ) %>%
  rename(PARNO = ALT_TAXACC, ADDRESS = SITUS, CITY = SITUSCSZ) %>%
  mutate(ZIP = NA, County = "Marion") %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310) # NAD83 California Albers

qsave(marion, file.path(WORKING, "lotlines", "orig", "41047.qs"))
rm(marion)


jackson <- st_read(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "Oregon_Jackson/Tax_Lots.shp")) %>%
  rename(PARNO = ACCOUNT, ADDRESS = SITEADD) %>%
  select(PARNO, ADDRESS) %>%
  mutate(CITY = NA, ZIP = NA, County = "Jackson") %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310) # NAD83 California Albers

qsave(jackson, file.path(WORKING, "lotlines", "orig", "41029.qs"))
rm(jackson)

lane <- st_read(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "Oregon_Lane/LaneCountyTaxlots.shp")) %>%
  rename(PARNO = MAPTAXLOT) %>% # this is different than APN but we merge it to OldParcelNumber in merge file - kevin
  select(PARNO) %>%
  mutate(ADDRESS = NA, CITY = NA, ZIP = NA, County = "Lane") %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310) # NAD83 California Albers

qsave(lane, file.path(WORKING, "lotlines", "orig", "41039.qs"))
rm(lane)

# ------- Washington ---------------------------------------------------------
# I found two sets of local parcel files for Okanogan with different vintages which give better merge rates than
#   using a fixed date. - Kevin

ok2012 <- st_read(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "Washington_Okanogan_2012/2012-10-24_landuse.shp")) %>%
  rename(PARNO = PIN) %>%
  select(PARNO) %>%
  mutate(ADDRESS = NA, CITY = NA, ZIP = NA, County = "Okanogan") %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310) # NAD83 California Albers

qsave(ok2012, file.path(WORKING, "lotlines", "orig", "53047_2012.qs"))
rm(ok2012)


ok2019 <- st_read(file.path(INPUT_PRIVATE, "ParcelBoundaryFiles", "OtherSources", "Washington_Okanogan_2019/2019-11-10_Landuse.shp")) %>%
  rename(PARNO = PIN) %>%
  select(PARNO) %>%
  mutate(ADDRESS = NA, CITY = NA, ZIP = NA, County = "Okanogan") %>%
  st_cast("MULTIPOLYGON") %>%
  st_transform(crs = 3310) # NAD83 California Albers

qsave(ok2019, file.path(WORKING, "lotlines", "orig", "53047_2019.qs"))
rm(ok2019)


################################################################################################
############ HAND-CORRECT APN FORMATTING BY COUNTY AND MERGE PARCEL POLYGONS TO HOMES DATA #####
################################################################################################

#---------------------------------- Fixing merge field ---------------------------------------------------
# Read and return the parcel file of a given FIPS.
# Note this function is subtly different from the one by the same name in MatchParcelHomeAPNs:
#   this one by default reads the original (unmodified) parcel file, and only reads the fixed one
#   with the argument fixed=TRUE
# By default removes hyphens after copying the original formatting into a column "original_PARNO"
readParcelFile <- function(FIPS) {
  parcels <- qread(file.path(WORKING, "lotlines", "orig", paste0(FIPS, ".qs")))
  parcels$original_PARNO <- parcels$PARNO
  # parcels <- st_transform(parcels, crs=thisCRS)
  parcels$PARNO <- gsub("-", "", parcels$PARNO, fixed = TRUE)
  return(parcels)
}

checkNums <- function(fips) {
  if (length(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & homes$destroyed == TRUE]) == 0) {
    return("No destroyed homes in this county")
  }
  print("APNs in the homes file")
  print(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)])
  print("Parcel Shapefile APNs")
  p <- readParcelFile(fips)
  print(p$PARNO)
  print(paste0(length(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & homes$destroyed == TRUE]), " destroyed homes"))
}


### Use these functions to see which counties require multiple "mergegroups" due to changes in format of UAPN across ZTRAX versions
getYears <- function(fips) {
  table(homes$AssessmentYear[homes$FIPS == as.numeric(fips)])
  table(
    nchar(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)]),
    homes$AssessmentYear[homes$FIPS == as.numeric(fips)]
  )
}

getInc <- function(fips) {
  table(homes$AssessmentYear[homes$FIPS == as.numeric(fips)])
  table(
    nchar(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)]),
    homes$incidentid[homes$FIPS == as.numeric(fips)]
  )
}


## 4 counties, Siskiyou, San Luis Obispo, Plumas, and Mariposa have zero records in the statewide parcel data.
# Siskiyou is only 9 destroyed homes and Plumas and Mariposa are only 1 each, but SLO is about 170. Can ultimately try to bring in other
# parcel data source for SLO (and Siskiyou). Unfortunately SLO data is not available from the county.
# See: https://www.slocounty.ca.gov/Departments/Information-Technology/Services/GIS-Services/GIS-Parcel-Data.aspx.
# Siskiyou data are available and I save in the "other states parcel shapefile" data folder.
nrow(readParcelFile("06093"))
nrow(readParcelFile("06079"))
nrow(readParcelFile("06063"))
nrow(readParcelFile("06043"))
## - Remove these counties from the list to be processed
fipslist <- fipslist[-which(fipslist == "06093")]
fipslist <- fipslist[-which(fipslist == "06079")]
fipslist <- fipslist[-which(fipslist == "06063")]
fipslist <- fipslist[-which(fipslist == "06043")]

# Contra Costa is unworkable; remove from list to be processed.
fipslist <- fipslist[-which(fipslist == "06013")]
# There are only 5 homes in this county, all with very different UAPNs than the parcels in the area




#------------------------------------------#
#------- MORE COMPLICATED COUNTIES --------#
#------------------------------------------#
## These have changes in the formatting of UAPN across years in the Zillow data, making cleanup more laborious.

fips <- "06007" # Butte
getYears(fips)
getInc(fips)
## Append '000' to the 9-digit UAPNs in the homes data.
p <- readParcelFile(fips)
table(substr(p$PARNO, 10, 12), useNA = "ifany")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 9] <-
  paste0(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 9], "000")
rm(p)


fips <- "06033" ### Somewhat more complicated than most
#* Some of the UAPNs in the homes data have 10-digits, unlike most which have 8 and the parcel shapefile which also has 8.
#* These extra 2 characters are ALWAYS '01' for this county.
#* I investigate whether trimming those two digits creates any duplication, then I do it (recognizing that duplicates are ok in the join to parcels below, which creates all pairwise combinations of parcel polygons and  homes records.
getYears(fips)
getInc(fips)
p <- readParcelFile(fips)
table(nchar(p$PARNO), useNA = "ifany")
table(nchar(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)]), useNA = "always")
table(substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 10], 9, 10), useNA = "ifany") # the extra 2 digits are always "01' in 10-digit UAPNs
table(substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 9], 9, 9), useNA = "ifany") # in the small handful of 9-digit UAPNs, these are all over the place. Less than 1% of homes though (82 out of >20,000)
sum(duplicated(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)])) # Already a little duplication even before trimming
#* Implement the edit to homes data.
homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)] <- substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)], 1, 8)
sum(duplicated(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)])) # small increase in dupes after trimming
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)


fips <- "06039" # Madera
getYears(fips)
getInc(fips)
p <- readParcelFile(fips)
table(nchar(p$PARNO), useNA = "ifany")
## The parcel file APNs have 9 chars.
## Most UAPNs in the homes data have 9 chars, but this 1 incident has 12-char UAPNs that all end in '000'. I strip the '000' from these homes records.
homes$UnformattedAssessorParcelNumber[homes$FIPS == 6039 & homes$incidentid == "CAFKU 013369"]
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
## Strip '000' from the 12-digit UAPNs in the homes data
homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 12] <-
  substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 12], 1, 9)
rm(p)



fips <- "06045" ### Somewhat more complicated than most
## -- Parcel shapefile UAPNs have 8 digits, homes UAPNs have either 8 or 10. No re-arrangements to the APNs can perfectly solve the matching problem.
## I trim the final 2 numbers of UAPN. these trimmed UAPNs still uniquely identify ALMOST all homes. There are 8 duplicate records (16 duplicated).
#* This is OK -- the join between homes and parcels will create all possible combinations of homes observations and parcel observations, then
#* assign locations to ImportParcelIDs (which are still unique).
getYears(fips)
getInc(fips)
p <- readParcelFile(fips)
table(nchar(p$PARNO), useNA = "ifany")
table(nchar(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)]), useNA = "always")
table(substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 10], 9, 10), useNA = "ifany") ## a Mix of '00' and '01'
#* Implement the edit to homes data.
homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)] <- substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)], 1, 8)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)



fips <- "06053"
getYears(fips)
getInc(fips)
## Append '000' to the 9-digit UAPNs in the homes data.
p <- readParcelFile(fips)
table(substr(p$PARNO, 10, 12), useNA = "ifany")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 9] <-
  paste0(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 9], "000")
rm(p)


fips <- "06055" # Napa
getYears(fips)
getInc(fips)
## Append '000' to the 9-digit UAPNs in the homes data.
p <- readParcelFile(fips)
table(substr(p$PARNO, 10, 12), useNA = "ifany")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 9] <-
  paste0(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 9], "000")
rm(p)



fips <- "06057" # Nevada DONE
#* PARNO in the parcel shapefile has 10 digits. The last 3 digits are always '000'.
#* UAPNs in the homes data have either 7 or 12.
#* For the UAPNs with 7 digits, I add '000' at the end.
#* For the UAPNs with 12 digits, there are 2 EXTRA zeros that have been inserted at position 1 and 7. I remove these.
#* I end up in all cases with 10-digit UAPNs with high merge success. May be lots of duplicates, however.
getYears(fips)
getInc(fips)
# PARNO is broken, says "Placer" despite County being "Nevada"
p <- readParcelFile(fips)
p <- p[1434:dim(p)[[1]], ] # gets rid of all the weird data
table(substr(p$PARNO, 8, 10))
#-- Add 000 to the 7-digit homes
homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 7] <-
  paste0(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 7], "000")
#-- For the 12-digit homes, remove the extra 0 in the 1st and 7th position. (Commented out code checks which zero to remove)
# a<-homes[homes$FIPS==as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber)== 12]
# a1<-a
# a1$UnformattedAssessorParcelNumber<-paste0(substr(a$UnformattedAssessorParcelNumber,2,6),substr(a$UnformattedAssessorParcelNumber,8,12))
# a2<-a
# a2$UnformattedAssessorParcelNumber<-paste0(substr(a$UnformattedAssessorParcelNumber,2,5),substr(a$UnformattedAssessorParcelNumber,7,12))
# nrow(merge(a1,p,by.x='UnformattedAssessorParcelNumber',by.y='PARNO'))
# nrow(merge(a2,p,by.x='UnformattedAssessorParcelNumber',by.y='PARNO'))
homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 12] <-
  paste0(
    substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 12], 2, 6),
    substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 12], 8, 12)
  )
table(nchar(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)]), useNA = "ifany")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06061" # Placer
#* Some of the UAPNs in the homes data have 12 digits, unlike most which have 9.
#*  Parcel shapefile APNS have 12 digits, but last 3 are always zero.
#* The added 3 characters in the homes data are usually 5X0, where X can vary. Dropping these 5X0 suffixes does create some duplicates.
p <- readParcelFile(fips)
getInc(fips)
getYears(fips)
for (i in 10:12) {
  print(table(substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)], i, i)))
  print(table(substr(p$PARNO, i, i)))
}
sum(duplicated((substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)], 1, 9))))
# Strip the last 3 chars from 12-char homes UAPNs
homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 12] <-
  substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 12], 1, 9)
table(nchar(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)]), useNA = "ifany")
# Then append 3 zeros onto all homes UAPNs
homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)] <- paste0(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)], "000")
table(nchar(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)]), useNA = "ifany")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)


fips <- "06071" # San Bernardino
p <- readParcelFile(fips)
# Some records (mostly newer fires) have 13 characher APNs, others (older fires) have 9-character APNS. Parcel shapefile has 9-character APNS.
getInc(fips)
# When positions 10--13 are population in the homes APN, they are reliably zero. Drop with no consequence.
for (i in 10:13) {
  print(table(substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)], i, i)))
}
homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)] <-
  substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)], 1, 9)
table(nchar(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)]), useNA = "ifany")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)


fips <- "06073" #  San Diego
#* For this county, the parcel shapefile APNs have 10 digits. More recent ZTRAX records also have 10 digits, but older records (for 2003 and 2007 fires) have 8 digits.
#* There are no positions in the 8-digit APNs where zeros or other chars can be inserted to fully solve this problem on all records.
#* However, appending "00" to the end of the 8-digit APN leads to a unique match in the parcel 98% of the time -- see testing with random draw of homes below.
#* (technically, this test shows that appending the zeros almost always yields same result as stripping the final two characters off the parcel shapefile UAPN).
getYears(fips)
getInc(fips)
p <- readParcelFile(fips)
table(substr(p$PARNO, 9, 10), useNA = "ifany")

#-- make this edit to the 8-digit UAPNs in the homes data.
homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 8] <-
  paste0(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 8], "00")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)


fips <- "06089"
getYears(fips)
getInc(fips)
p <- readParcelFile(fips)
table(nchar(p$PARNO), useNA = "ifany")
table(substr(p$PARNO, 9, 12), useNA = "ifany")
## In the parcel UAPN, positions 6,7, 10, 11, and 12 are reliably 0.
for (j in 1:12) {
  print(j)
  print(table(substr(p$PARNO, j, j), useNA = "ifany"))
}
## In the 8-digit homes UAPN, position 6 is reliably zero.
for (j in 1:8) {
  print(j)
  print(table(substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 8], j, j), useNA = "ifany"))
}
## - For 8-digit UAPNs in homes data, Stick another 0 after position 6, and append '000' to end.
homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 8] <-
  paste0(
    substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 8], 1, 6),
    "0",
    substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 8], 7, 8),
    "000"
  )
table(nchar(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)]), useNA = "ifany")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)


fips <- "06097"
## -- Parcel UAPNs are 9 digits. Homes UAPNs are either 9 or 12. For the 12-digit home UAPNs, last three digits always '000'. Drop with no consequence.
getYears(fips)
getInc(fips)
p <- readParcelFile(fips)
table(nchar(p$PARNO), useNA = "ifany")
table(substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 12], 10, 12)) ## the extra chars are always '000'.
## -- edit the homes data to drop the last 3 digits of the 12-digit UAPNs.
homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)] <-
  substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips)], 1, 9)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)


fips <- "06103" # Tehama
# Homes UAPNs have 8 chars, Parcel shapefile APNs have 9 chars. Last digit in the parcel shapefile is usually a 1 but can take on other values. Dropping the last digit creates some duplicated parcel shapefile records.
p <- readParcelFile(fips)
getInc(fips)
getYears(fips)
table(nchar(p$PARNO), useNA = "ifany")
table(substr(p$PARNO, 9, 9))
p$PARNO <- substr(p$PARNO, 1, 8)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)


fips <- "06105" # Trinity
getYears(fips)
getInc(fips)
p <- readParcelFile(fips)
table(nchar(p$PARNO), useNA = "ifany")
table(substr(p$PARNO, 9, 10), useNA = "ifany") ## 99% of these are '00'
table(substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 10], 9, 10), useNA = "ifany") ## these also almost always '00'
#-- edit 8-digit UAPNs in the homes data by sticking on '00' at end.
homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 8] <-
  paste0(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 8], "00")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)


fips <- "06115" # Yuba
getYears(fips)
getInc(fips)
p <- readParcelFile(fips)
table(nchar(p$PARNO), useNA = "ifany")
table(substr(p$PARNO, 10, 12), useNA = "ifany") ## 99.9% of these are '000'
table(substr(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 12], 10, 12), useNA = "ifany") ## 100% of these are '000'.
#-- edit 9-digit UAPNs in the homes data by sticking on '000' at end.
homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 9] <-
  paste0(homes$UnformattedAssessorParcelNumber[homes$FIPS == as.numeric(fips) & nchar(homes$UnformattedAssessorParcelNumber) == 9], "000")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)



#------------------------------#
#------- EASY COUNTIES --------#
#------------------------------#
# Consistent UAPN formatting across years and incidents within the county.

fips <- "06009" # Calaveras DONE
p <- readParcelFile(fips)
p$PARNO <- paste0("0", p$PARNO)
# Some *Parcels* have "ROW" at the end of the PARNO -> I think these are easements
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06017" # El Dorado There are only 4 homes in this county? DONE
p <- readParcelFile(fips)
p$PARNO <- paste0(p$PARNO, "100")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06019" # Fresno
getYears(fips)
getInc(fips)
p <- readParcelFile(fips)
table(nchar(p$PARNO), useNA = "ifany")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06029" # no changes needed?
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06035" # no changes needed?
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06037" # Los Angeles  -- no changes needed?
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
# weird_PARNO <- "2580018114"#here is an example of an APN that gives us the weird boundary
## Some others: "2580018154" "2580018155" "2580018156" "2580018157"
rm(p)

fips <- "06051" # no changes needed
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06059"
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06065" # low merge rate, not sure why.
getYears(fips)
getInc(fips)
p <- readParcelFile(fips)
table(nchar(p$PARNO), useNA = "ifany")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06077" # San Joaquin
p <- readParcelFile(fips)
p$PARNO <- paste0(p$PARNO, "0000")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06081" # San Mateo
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06083" #  #no changes needed
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06087" # Santa Cruz  - ## no changes needed??
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06095" #  #no changes needed
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06099" # Stanislaus
p <- readParcelFile(fips)
p$PARNO <- paste0(p$PARNO, "000")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06107" # Tulare
p <- readParcelFile(fips)
p$PARNO <- paste0(p$PARNO, "000")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06111" #  #no changes needed
getYears(fips)
getInc(fips)
p <- readParcelFile(fips)
table(nchar(p$PARNO), useNA = "ifany")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "06113" # Yolo DONE
p <- readParcelFile(fips)
p$PARNO <- gsub(" ", "", p$PARNO, fixed = TRUE)
p$PARNO <- paste0(substr(p$PARNO, 1, 6), "0", substr(p$PARNO, 7, 8), "000")
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

#------------------------------------------------#
#----- OUTSIDE CALIFORNIA -----------------------#
#------------------------------------------------#

fips <- "04025" # Arizona Yavapai
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "08041" # Colorado El Paso
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "08049" # Colorado Grand
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)



fips <- "08069" # Colorado Larimer
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "41047" # Oregon Marion
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "41029" # Oregon Jackson
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "41039" # Oregon Lane
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
#### Modify UAPN in the homes data to use 'OldParcelNumber' for this county only
f <- read_fst(file.path(WORKING, "attr_or_20_cleaned.fst"), as.data.table = T) %>%
  group_by(ImportParcelID) %>%
  arrange(ImportParcelID, BuildingOrImprovementNumber) %>%
  dplyr::filter(row_number() == 1) %>%
  dplyr::select(ImportParcelID, OldParcelNumber)
homes <- homes %>%
  merge(f, by = "ImportParcelID", all.x = TRUE) %>%
  mutate(UnformattedAssessorParcelNumber = if_else(FIPS == "41039",
    substr(gsub("-", "", OldParcelNumber, fixed = TRUE), 1, 13),
    UnformattedAssessorParcelNumber
  )) %>%
  dplyr::select(-OldParcelNumber)
rm(p, f)

fips <- "53047_2012" # Washington OKanogan 2012 file
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)

fips <- "53047_2019" # Washington OKanogan 2019 file
p <- readParcelFile(fips)
qsave(p, file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))
rm(p)


#-----------------------------------------------------#
#------- Match APNs between homes and parcels --------#
#-----------------------------------------------------#

# Read in the parcels data, subset the homes in that county, then merge
# Plot the results for each merge as a check


mergeHomesToParcels <- function(fips) {
  # fips <- "08049" # DEBUG
  countyparcels <- qread(file.path(WORKING, "lotlines", "fixed", paste0(fips, "_fixed.qs")))

  inclist <- homes %>%
    dplyr::filter(FIPS == as.numeric(fips)) %>%
    group_by(incidentid) %>%
    dplyr::select(incidentid) %>%
    dplyr::filter(row_number() == 1)

  mergerates <- data.frame(matrix(data = NA, nrow = length(inclist$incidentid), ncol = 4))
  names(mergerates) <- c("FIPS", "incidentid", "mergerate", "numhomes")
  mergerates$FIPS <- fips
  mergerates$incidentid <- inclist$incidentid

  # Initialize an empty sf object to hold merged results
  merged <- countyparcels[0, ] %>%
    select(PARNO) %>%
    mutate(FIPS = NA, ImportParcelID = NA, incidentid = NA)
  
  

  for (i in 1:nrow(inclist)) {
    subhomes <- homes[FIPS == as.numeric(fips) & incidentid == inclist$incidentid[i], ]
    k <- merge_incident(homesdata = subhomes, parcelshapefile = countyparcels)
    merged <- rbind(merged, k)
    mergerate <- length(unique(k$ImportParcelID)) / length(unique(subhomes$ImportParcelID))
    mergerates$mergerate[mergerates$incidentid == inclist$incidentid[i]] <- mergerate
    mergerates$numhomes[mergerates$incidentid == inclist$incidentid[i]] <- length(unique(subhomes$ImportParcelID))
    rm(k, mergerate)
  }
  
  checkratio <- nrow(merged) / nrow(homes[FIPS == as.numeric(fips), ])
  if (abs(1 - checkratio) > 0.1) {
    print("Big difference in number of rows between original and merged")
  }

  qsave(merged, file.path(WORKING, "lotlines", "merged", paste0("merged", fips, ".qs")))

  return(mergerates)
}

merge_incident <- function(homesdata, parcelshapefile) {
  df <- parcelshapefile %>%
    merge(homesdata,
          by.x = "PARNO",
          by.y = "UnformattedAssessorParcelNumber"
    ) %>% # all.y=TRUE
    arrange(ImportParcelID) %>%
    dplyr::select(FIPS, ImportParcelID, incidentid, PARNO)
  return(df)
}


# Create a fipslist to run this over -- remove Okanogan, WA because requires special handling below
modifiedfipslist <- c(fipslist[which(fipslist != "53047")])

allmergerates <- data.table(FIPS = character(), incidentid = character(), mergerate = numeric(), numhomes = numeric())

for (w in seq(1, length(modifiedfipslist))) {
  print(modifiedfipslist[w])
  rates <- mergeHomesToParcels(modifiedfipslist[w])
  allmergerates <- rbind(allmergerates, rates)
}
rm(w, rates)

## Okanogan WA requires some special handling because of the two parcel files for different years
okanoganhandler <- function(year) {
  okng <- qread(file.path(WORKING, "lotlines", "fixed", paste0("53047_", year, "_fixed.qs")))
  qsave(okng, file.path(WORKING, "lotlines", "fixed", paste0("53047_fixed.qs"))) # temporarily save a file for Okngn using this year
  m <- mergeHomesToParcels("53047")
  temp <- qread(file.path(WORKING, "lotlines", "merged", "merged53047.qs")) # read the merged results file
  qsave(temp, file.path(WORKING, "lotlines", "merged", paste0("merged53047_", year, ".qs")))
  unlink(file.path(WORKING, "lotlines", "fixed", paste0("53047_fixed.qs")))
  return(m)
}
ok2012mrates <- okanoganhandler(2012) %>%
  dplyr::filter(incidentid %in% c(
    "CarltonComplex_53047_2014",
    "OkanoganComplex_53047_2015"
  ))
ok2019mrates <- okanoganhandler(2019) %>%
  dplyr::filter(incidentid == "ColdSprings_53047_2020")
allmergerates <- rbind(allmergerates, ok2012mrates, ok2019mrates)
rm(ok2012mrates, ok2019mrates)
### combine 2012 and 2019 merged data files for okngn
a <- qread(file.path(WORKING, "lotlines", "merged", "merged53047_2012.qs")) %>% # read the merged results file
  dplyr::filter(incidentid %in% c(
    "CarltonComplex_53047_2014",
    "OkanoganComplex_53047_2015"
  ))
b <- qread(file.path(WORKING, "lotlines", "merged", "merged53047_2019.qs")) %>% # read the merged results file
  dplyr::filter(incidentid == "ColdSprings_53047_2020")
c <- rbind(a, b)
qsave(c, file.path(WORKING, "lotlines", "merged", "merged53047.qs"))
unlink(file.path(WORKING, "lotlines", "merged", "merged53047_2012.qs"))
unlink(file.path(WORKING, "lotlines", "merged", "merged53047_2019.qs"))
rm(a, b, c)

write.csv(allmergerates, file.path(WORKING, "mergerates_homes_to_lotlines.csv"))

### Save a final fipslist that I use to identify counties to analyze in 3e (merge footprints to parcel boundaries)
qsave(fipslist, file.path(WORKING, "fips_to_merge_to_fprints.qs"))
